Bruvo.distance <- function(genotype1, genotype2, maxl=9, usatnt=2, missing=-9) {

require(combinat) # for permn

if(identical(genotype1, genotype2)) {dist <- 0} else {
    if((length(genotype1)>maxl & length(genotype2)>maxl)|
       genotype1[1]==missing|genotype2[1]==missing){
        dist <- NA} else {
        if(length(genotype1) >= length(genotype2)) {
            genotypeL <- genotype1/usatnt; genotypeS <- genotype2/usatnt} else {
                genotypeL <- genotype2/usatnt; genotypeS <- genotype1/usatnt}

# if genotypes are identical, just return a distance of zero without doing the rest
        # of the calculation
# if genotypes are both longer than 9, skip this calculation because it will take hours
# whichever genotype has more alleles, make this genotypeL (long) and the other
        # genotypeS (short)
# convert alleles into repeat counts by dividing by usatnt

kl <- length(genotypeL) # sets the ploidy level for this genotype comparison
ks <- length(genotypeS) # number of alleles in the shorter genotype

allele.distances <- array(0 , c(kl,ks))
# Create an empty matrix to contain the raw distances between alleles

for(n in 1:kl) { for(m in 1:ks) {allele.distances[n,m] <- genotypeL[n] - genotypeS[m]}}
# fills the array with the differences in allele repeat count

geometric.distances <- array(1 - 2^-abs(allele.distances) , c(kl,ks))
# geometric transformation based on mutation probabilities

#Next, find the minimum distance sum among all permutations

column <- 1:ks # an index of all columns (genotypeS alleles)
row <- 1:kl # an index of all rows (genotypeL alleles)

combinations <- combn(row, ks, FUN = NULL, simplify=FALSE)
# all combinations of alleles in genotypeL that can be matched to non-virtual
        # alleles in genotypeS

permutations <- permn(ks)
# all possible orders that alleles within these combinations can go in

mindist <- Inf # this variable will store the minimum sum encountered so far.

for(i in 1:length(combinations)) {
# the loop to go through every possible sum of compatible allele comparisons

rowcomb <- combinations[[i]] # choose one combination of rows for this round

for(l in 1:length(permutations)){ # go through all orders of this combinations of rows

sum <- 0 # this is si, the sum of allele comparisons

for(j in 1:ks){
    sum <- sum + geometric.distances[rowcomb[permutations[[l]][j]],column[j]]}
# the loop to calculate the sum for this permutation

if(sum < mindist) {mindist <- sum} # is this the minimum sum found so far?

}}

dist <- (mindist+kl-ks)/kl
# add 1 for each infinite virtual allele, then divide by the ploidy

}}
return(dist)
}

Lynch.distance<-function(genotype1,genotype2,usatnt=NA,missing=-9){
    if(genotype1[1]==missing || genotype2[1]==missing){
        # return NA if there is any missing data
        distance<-NA
    } else {
        # get the average number of bands for the two genotypes
        meanbands<-(length(genotype1)+length(genotype2))/2
        # find how many bands the genotypes have in common
        commonbands<-length(genotype1[genotype1 %in% genotype2])
        # calculate the distance
        distance<- 1-(commonbands/meanbands)
    }

    # return distance
    return(distance)
}


meandistance.matrix <- function(object, samples=Samples(object),
                                 loci=Loci(object), all.distances=FALSE,
                                 distmetric=Bruvo.distance,
                                progress=TRUE, ...){
    # subset the object so that samples can be numbered
    object <- object[samples, loci]

# create an array containing all distances by locus and sample
    loci.matrices<-array(dim=c(length(loci),length(samples),length(samples)),
                         dimnames=list(loci,samples,samples))
    for(L in loci){
       for(m in 1:length(samples)){
           for(n in m:length(samples)){
               thisdistance <- distmetric(Genotype(object, m, L),
                                          Genotype(object, n, L),
                                          usatnt = Usatnts(object)[L],
                                          missing = Missing(object),
                                          ...)
               loci.matrices[L,m,n] <- thisdistance
               loci.matrices[L,n,m] <- thisdistance
               if(progress) print(c(L, samples[m], samples[n]))
           }
       }
    }

    # calculate the mean matrix across all loci
    mean.matrix<-matrix(nrow=length(samples),ncol=length(samples),
                        dimnames=list(samples,samples))
    for(j in samples){
        for(k in samples){
            mean.matrix[j,k]<-mean(loci.matrices[,j,k][!is.na(loci.matrices[,j,k])])
        }
    }

    # return either the mean matrix and possibly the array as well
    if(all.distances){
        return(list(DistByLoc=loci.matrices, MeanMatrix=mean.matrix))
    } else {
        return(mean.matrix)
    }
}

meandist.from.array<-function(distarray, samples=dimnames(distarray)[[2]],
                              loci=dimnames(distarray)[[1]]){
    # get the array to be averaged
    subarray<-distarray[loci,samples,samples]
    # make a matrix to put the means into
    mean.matrix<-matrix(nrow=length(samples),ncol=length(samples),
                        dimnames=list(samples,samples))
    for(j in samples){
        for(k in samples){
            mean.matrix[j,k]<-mean(subarray[,j,k][!is.na(subarray[,j,k])])
        }
    }
    return(mean.matrix)
}

find.na.dist<-function(distarray, samples=dimnames(distarray)[[2]],
                              loci=dimnames(distarray)[[1]]){
    # set up vectors for data frame to contain info on where missing data is
    Locus<-""
    Sample1<-""
    Sample2<-""
    # current row in the data frame
    currrow<-1
    # go through the array, find NA, and put the index into the data frame
    for(L in loci){
        for(s1 in samples){
            for(s2 in samples){
                if(is.na(distarray[L,s1,s2])){
                    Locus[currrow]<-L
                    Sample1[currrow]<-s1
                    Sample2[currrow]<-s2
                    currrow<-currrow+1
                }
            }
        }
    }
    #return data frame
    return(data.frame(Locus=Locus, Sample1=Sample1, Sample2=Sample2, stringsAsFactors=FALSE))
}

find.missing.gen<-function(object, samples=Samples(object),
                           loci=Loci(object)){
    # set up vectors to contain the indices to put into the data frame
    Locus <- c("")
    Sample <- c("")
    # current row in the data frame
    currrow<-1
    # find which data are missing
    for(L in loci){
        for(s in samples){
            if(isMissing(object, s, L)){
                Locus[currrow]<-L
                Sample[currrow]<-s
                currrow<-currrow+1
            }
        }
    }
    # return the data frame
    return(data.frame(Locus=Locus,Sample=Sample, stringsAsFactors=FALSE))
}

# find NA distances that aren't the result of missing data
find.na.dist.not.missing<-function(object, distarray,
                                   samples=dimnames(distarray)[[2]],
                                   loci=dimnames(distarray)[[1]]){
    # get the data frames of NA distances and missing genotypes
    na.dist<-find.na.dist(distarray,samples=samples,loci=loci)
    missing.gen<-find.missing.gen(object,samples=samples,loci=loci)
    # set up vectors for data frame to contain results
    Locus<-""
    Sample1<-""
    Sample2<-""
    currrow<-1
    # for each row of na.dist, look for that locus and samples in missing.gen
    for(i in 1:length(na.dist$Locus)){
        L<-na.dist$Locus[i]
        s1<-na.dist$Sample1[i]
        s2<-na.dist$Sample2[i]
        missthislocus<-missing.gen[missing.gen$Locus==L,]
        if(identical(c(s1,s2) %in% missthislocus$Sample, c(FALSE,FALSE))){
            Locus[currrow]<-L
            Sample1[currrow]<-s1
            Sample2[currrow]<-s2
            currrow<-currrow+1
        }
    }
    # return data frame
    return(data.frame(Locus=Locus, Sample1=Sample1, Sample2=Sample2,
                      stringsAsFactors=FALSE))
}
